Combining it all to create the maps#

  • Create a map of the UK including all four individual countries

  • Create a way to map postcodes to an area in this map

See Kaggle

from MapUtils import *
import os
# geometry data
import fiona
import geopandas as gpd
import json

# general - working with dataframes and numbers
import pandas as pd
import numpy as np

import os

# plot map parts
import folium
import branca.colormap as cm

def load_kml(url_data):
    """
    loads a KML location file (from local file or url) 
    and then returns a geopandas dataframe
    
    Args: url_data (file path) the path to the kml file,
          
    Returns: 
        the geopandas dataframe
    
    """
    # so can use kml files
    fiona.drvsupport.supported_drivers['KML'] = 'rw'

    # load the kl file to a gpd df
    gdf = gpd.read_file(url_data, driver=fiona.drvsupport.supported_drivers['KML'])

    return gdf
    
def gdf_create_json(gdf, loc_save='./_data', fname='region.json'):
    """
    loads a KML location file (from local file or url) 
    and then saves this as a json file locally
    
    Args: 
          loc_save (directory path) the location to save the data
          fname (file name string) the file name of the json file
    Returns: 
        Names the regions in the json file
        file path to the json file
    """
        
    # Save as a json file to load in plot_map
    file_path = os.path.join(loc_save, fname)
    gdf.to_file( file_path, driver="GeoJSON")

    
    return gdf.Name, file_path




def url_KML_map(url_data_paths, json_fname, doScotWales=[False]):
    """
    The main function used to take a KML file and plot it 
        Calls kml_create_json to create json file
        and plot_map to plot the map
    
    Args: url_data (file path) the path to the kml file
          doScotWales (string) whether need to call split_islands on Scottish data
            or join on Wales data
    Returns: 
        The folium map
    
    """
    gdfAll = gpd.GeoDataFrame()
    
    for i, url_data in enumerate(url_data_paths):
        gdf = load_kml(url_data)
        
        if doScotWales[i] == 'Scotland':
            gdf = split_islands(gdf)
        elif doScotWales[i] == 'Wales':
            gdf = joinWalesRegions(gdf)
        elif doScotWales[i] == 'Northern Ireland':
            gdf = gdf.loc[gdf.Name=='BT']
            gdf.Name = gdf.Name.replace({'BT':'Northern Ireland'})
        gdfAll = pd.concat([gdfAll, gdf], ignore_index=True)
    
    # create a json file for plotting and gives back names of regions
    fname= json_fname +'.json'
    map_names, json_path = gdf_create_json(gdfAll, loc_save='./_data', fname=fname)

    
    # create a datafrme to check it all works
    df = pd.DataFrame(columns=['County','Data'])
      
    # add the names of the regions
    df['County'] = map_names
    # create some random data to plot
    df['Data']= np.random.randint(0 ,100,len(df) )

    m = plot_map(df,what_to_plot='Data',region_to_plot='County',
                json_path = json_path)
    
    return m, df

def split_islands(df):
    """
    Takes a gpd dataframe of Scotland which needs reducing in size as too big to plot
        Highlands and Islands multi-polygon is split (or exploded) from a  multi-polygon to set of polygons, the first few are selected and then merged (dissolve) and combined with rest of df
    
    Args: df (geopandas dataframe) of Scottish data
    Returns: 
        df (geopandas dataframe) reduced in size
    
    """
    from shapely.geometry.polygon import Polygon
    from shapely.geometry.multipolygon import MultiPolygon
    
    # the islands is the 7th component
    gdf = df.iloc[7:8].copy()

    # slight mods to help change things
    gdf["geometry"] = [MultiPolygon([feature]) if isinstance(feature, Polygon)
                       else feature for feature in gdf["geometry"]]

    # explode th eislands into a number of polygons from one multipolygon
    gdf_parts = gdf.explode(column='geometry', ignore_index=True, index_parts=False)

    # take the first 3 elements only and dissolve back to one multi polygon
    df7new = gdf_parts.iloc[0:3].dissolve()

    # create the new geopanda with the new islands and the previous rest
    dfnew = pd.concat([df.loc[:6], df7new]).reset_index(drop=True)
    
    return dfnew

def joinWalesRegions(df):
    wales_region_dict={ 
         'Wrexham':'North Wales', 
         'Conwy':'North Wales', 
         'Gwynedd':'North Wales', 
         'Isle of Anglesey':'North Wales',
         'Flintshire':'North Wales', 
         'Denbighshire':'North Wales', 
         'Powys':'Mid Wales',
         'Ceredigion':'Mid Wales',
         'Carmarthenshire':'South West Wales',
         'Swansea':'South West Wales',
         'Neath Port Talbot':'South West Wales',
         'Pembrokeshire':'South West Wales',
         'Bridgend':'South Wales',
         'Vale of Glamorgan':'South Wales',
         'Cardiff':'South Wales',
         'Rhondda Cynon Taf':'South Wales',
         'Merthyr Tydfil':'South Wales',
         'Caerphilly':'South East Wales',
         'Newport':'South East Wales',  
         'Torfaen':'South East Wales', 
         'Monmouthshire':'South East Wales',    
         'Blaenau Gwent':'South East Wales',
        }
    
    df.Name = df.Name.str.replace(' Council','')
    df.Name = df.Name.replace(wales_region_dict)
    
    df = df.dissolve(by='Name',as_index=False)
    
    return df


url_data_paths = ['https://www.doogal.co.uk/kml/counties/Counties.kml',
                  '.\\_data\\scotland_preg_2011.KML',
                  '.\\_data\\WalesDistrict.kml',
                  'https://www.doogal.co.uk/kml/UkPostcodes.kml']
                  
country=['England', 'Scotland', 'Wales', 'Northern Ireland']

m, df_map_pc =url_KML_map(url_data_paths, 'combined_json', doScotWales=country)
m
Make this Notebook Trusted to load map: File -> Trust Notebook
gdfAll
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 1
----> 1 gdfAll

NameError: name 'gdfAll' is not defined


import pandas as pd

file_paths=[".\\_data\\England1 postcodes.csv",
            ".\\_data\\England2 postcodes.csv",
            ".\\_data\\Wales postcodes.csv",
            ".\\_data\\Scotland postcodes.csv",
            ".\\_data\\BT postcodes.csv",]

postcode_df = pd.DataFrame()
for file in file_paths:
    print(file)
    df_temp = pd.read_csv(file, 
             usecols=['Postcode','Country','District','District Code','County'])
    postcode_df = pd.concat([postcode_df, df_temp])


.\_data\England1 postcodes.csv
.\_data\England2 postcodes.csv
.\_data\Wales postcodes.csv
.\_data\Scotland postcodes.csv
.\_data\BT postcodes.csv
postcode_df
Postcode County District District Code Country
0 AL1 1AG Hertfordshire St Albans E07000240 England
1 AL1 1AJ Hertfordshire St Albans E07000240 England
2 AL1 1AR Hertfordshire St Albans E07000240 England
3 AL1 1AS Hertfordshire St Albans E07000240 England
4 AL1 1AT Hertfordshire St Albans E07000240 England
... ... ... ... ... ...
62480 BT99 1EA NaN Belfast N09000003 Northern Ireland
62481 BT99 1EB NaN NaN NaN Northern Ireland
62482 BT99 1ED NaN NaN NaN Northern Ireland
62483 BT99 1EE NaN NaN NaN Northern Ireland
62484 BT99 1EF NaN NaN NaN Northern Ireland

1841438 rows × 5 columns